Prototype differential analsys

Tools to compute and compare table or view summaries


In [1]:
import re
import pandas as pd
import BQhelper as bq
import inspectTable

bq.project = "mlab-sandbox"

In [3]:
# Replicate/audit inventories from 2020/02/28

# measurement-lab.ndt.downloads as Inventory_measurement_lab_ndt_downloads
# measurement-lab.ndt.uploads AS Inventory_measurement_lab_ndt_uploads
# mlab-oti.base_tables.ndt AS Inventory_mlab_oit_base_tables

basicIQ="""
WITH

CurrTab AS (
    SELECT
      CAST(log_time AS DATE) as test_date,
      REPLACE(REGEXP_EXTRACT(task_filename, "-(mlab[1-4]-[a-z][a-z][a-z][0-9][0-9t])-"), "-", ".") AS hostname,
      connection_spec.data_direction AS direction,
      COUNT( DISTINCT test_id) AS current_tests
    FROM `{table}`
    GROUP BY test_date, hostname, direction
),

RawData AS (
SELECT * FROM {table} LIMIT 100
),

deltaTab AS (
    SELECT *, CurrTab.current_tests-legacy_tests AS delta
    FROM CurrTab
    JOIN {archivedTable} AS archive
    USING (test_date, hostname, direction)
    WHERE CurrTab.current_tests != archive.legacy_tests
)


# select * FROM DeltaTab LIMIT 10
SELECT
    SUM(legacy_tests) AS legacy,
    SUM(current_tests) AS cur,
    SUM(delta) AS netD,
    SUM(delta) / SUM(legacy_tests) AS ratio
FROM deltaTab
"""

r=bq.DataFrameQuery(basicIQ, table='mlab-oti.base_tables.ndt', archivedTable='mlab-sandbox.mattmathis.Inventory_mlab_oit_base_tables')
print (r)


     legacy       cur    netD    ratio
0  54566100  54443863 -122237 -0.00224